/* Copyright (C) 2000-2002 Lavtech.com corp. All rights reserved.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h> 
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
              
#include "udm_config.h"
#include "udmsearch.h"

#ifdef HAVE_GETOPT_H
#include <getopt.h>
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif

#define TREEDIR "tree"

static void usage(void){

	fprintf(stderr,
"\n\
mkind from %s-%s-%s\n\
http://search.mnogo.ru/ (C) 1998-2002, LavTech Corp.\n\
\n\
Usage: mkind [OPTIONS] [config file]\n\
\n\
Options are:\n\
  -c		create CATEGORY index\n\
  -t		create TAG index\n\
  -h		create TIME (hour) index\n\
  -m		create TIME (min) index\n\
  -u		create HOST (URL) index\n\
  -l		create LANGUAGE index\n\
\n\
Please post bug reports and suggestions at http://www.mnogosearch.org/bugs\n",
	PACKAGE,VERSION,UDM_DBTYPE);

	return;
}

const char *config_name= UDM_CONF_DIR UDMSLASHSTR "indexer.conf";

static int cmp_ind8(const UDM_UINT8_URLID *c1,const UDM_UINT8_URLID *c2){
	uint4 n1=c1->hi;
	uint4 n2=c2->hi;
	
	if(n1==n2){
		n1=c1->lo; n2=c2->lo;
		if(n1==n2){
			n1=c1->url_id; n2=c2->url_id;
		}
	}
	if(n1<n2)return(-1);
	if(n1>n2)return(1);
	return(0);
}

static int cmp_ind4(const UDM_UINT4_URLID *c1,const UDM_UINT4_URLID *c2){
	uint4 n1=c1->val;
	uint4 n2=c2->val;
	
	if(n1==n2){
		n1=c1->url_id; n2=c2->url_id;
	}
	if(n1<n2)return(-1);
	if(n1>n2)return(1);
	return(0);
}

static int MakeNestedIndex(UDM_AGENT *Agent,const char *field,const char *lim_name,int type,void *db){
	UDM_UINT8_URLID	*pairs=NULL;
	size_t	count;
	size_t	k,prev;
	uint4	*data=NULL;
	UDM_UINT8_POS_LEN *ind=NULL;
	size_t	mind=1000,nind=0;
	char	fname[1024];
	int	dat_fd=0, ind_fd=0;
	
	pairs=UdmLimit8(Agent,field,(int*)&count,type,db);
	if(UdmEnvErrCode(Agent->Conf))fprintf (stderr,"Error: %s\n",UdmEnvErrMsg(Agent->Conf));
	if(!pairs)return(1);
	
	qsort(pairs,count,sizeof(UDM_UINT8_URLID),(qsort_cmp)cmp_ind8);
	
	data=(uint4*)malloc(count*4);
	if(!data){
		fprintf(stderr,"Error1: %s\n",strerror(errno));
		goto err1;
	}
	ind=(UDM_UINT8_POS_LEN*)malloc(mind*sizeof(UDM_UINT8_POS_LEN));
	if(!ind){
		fprintf(stderr,"Error2: %s\n",strerror(errno));
		goto err1;
	}
	prev=0;
	for(k=0;k<count;k++){
		data[k]=pairs[k].url_id;
		if((k==count-1) || (pairs[k].hi!=pairs[prev].hi) || (pairs[k].lo!=pairs[prev].lo)){
			if(nind==mind){
				mind+=1000;
				ind=(UDM_UINT8_POS_LEN*)realloc(ind,mind*sizeof(UDM_UINT8_POS_LEN));
				if(!ind){
					fprintf(stderr,"Error3: %s\n",strerror(errno));
					goto err1;
				}
			}
			/* Fill index */
			ind[nind].hi=pairs[prev].hi;
			ind[nind].lo=pairs[prev].lo;
			ind[nind].pos=prev*4;
			if(k==count-1) ind[nind].len=(k-prev+1)*4;
			else ind[nind].len=(k-prev)*4;
			printf("%08X%08X - %d %d\n",ind[nind].hi,ind[nind].lo,(int)ind[nind].pos,ind[nind].len);
			nind++;
			
			prev=k;
		}
	}
	UDM_FREE(pairs);
	
	snprintf(fname,sizeof(fname)-1,"%s%s%c%s.dat",Agent->Conf->vardir,TREEDIR,UDMSLASH,lim_name);
	if((dat_fd=open(fname,O_CREAT|O_WRONLY|O_TRUNC|UDM_BINARY,UDM_IWRITE))<0){
		fprintf(stderr,"Can't open '%s': %s\n",fname,strerror(errno));
		goto err1;
	}
	if((count*4)!=write(dat_fd,data,count*4)){
		fprintf(stderr,"Can't write '%s': %s\n",fname,strerror(errno));
		goto err1;
	}
	close(dat_fd);
	UDM_FREE(data);

	snprintf(fname,sizeof(fname)-1,"%s%s%c%s.ind",Agent->Conf->vardir,TREEDIR,UDMSLASH,lim_name);
	if((ind_fd=open(fname,O_CREAT|O_WRONLY|O_TRUNC|UDM_BINARY,UDM_IWRITE))<0){
		fprintf(stderr,"Can't open '%s': %s\n",fname,strerror(errno));
		goto err1;
	}
	if((nind*sizeof(UDM_UINT8_POS_LEN))!=write(ind_fd,ind,nind*sizeof(UDM_UINT8_POS_LEN))){
		fprintf(stderr,"Can't write '%s': %s\n",fname,strerror(errno));
		goto err1;
	}
	close(ind_fd);
	UDM_FREE(ind);
	
	return(0);
	
err1:
	UDM_FREE(pairs);
	UDM_FREE(data);
	UDM_FREE(ind);
	if(dat_fd)close(dat_fd);
	if(ind_fd)close(ind_fd);
	return(1);
}

static int MakeLinearIndex(UDM_AGENT *Agent,const char *field,const char *lim_name,int type,void *db){
	UDM_UINT4_URLID	*pairs=NULL;
	size_t	count;
	size_t	k,prev;
	uint4	*data=NULL;
	UDM_UINT4_POS_LEN *ind=NULL;
	size_t	mind=1000,nind=0;
	char	fname[1024];
	int	dat_fd=0, ind_fd=0;
	
	pairs=UdmLimit4(Agent,field,(int*)&count,type,db);
	if(UdmEnvErrCode(Agent->Conf))fprintf (stderr,"Error: %s\n",UdmEnvErrMsg(Agent->Conf));
	if(!pairs)return(1);
	
	qsort(pairs,count,sizeof(UDM_UINT4_URLID),(qsort_cmp)cmp_ind4);
	
	data=(uint4*)malloc(count*4);
	if(!data){
		fprintf(stderr,"Error1: %s\n",strerror(errno));
		goto err1;
	}
	ind=(UDM_UINT4_POS_LEN*)malloc(mind*sizeof(UDM_UINT4_POS_LEN));
	if(!ind){
		fprintf(stderr,"Error2: %s\n",strerror(errno));
		goto err1;
	}
	prev=0;
	for(k=0;k<count;k++){
		data[k]=pairs[k].url_id;
		if((k==count-1) || (pairs[k].val!=pairs[prev].val)){
			if(nind==mind){
				mind+=1000;
				ind=(UDM_UINT4_POS_LEN*)realloc(ind,mind*sizeof(UDM_UINT4_POS_LEN));
				if(!ind){
					fprintf(stderr,"Error3: %s\n",strerror(errno));
					goto err1;
				}
			}
			/* Fill index */
			ind[nind].val=pairs[prev].val;
			ind[nind].pos=prev*4;
			if(k==count-1) ind[nind].len=(k-prev+1)*4;
			else ind[nind].len=(k-prev)*4;
			/*printf("%d - %d %d\n",ind[nind].val,(int)ind[nind].pos,ind[nind].len);*/
			nind++;
			
			prev=k;
		}
	}
	UDM_FREE(pairs);
	
	snprintf(fname,sizeof(fname),"%s%s%c%s.dat",Agent->Conf->vardir,TREEDIR,UDMSLASH,lim_name);
	if((dat_fd=open(fname,O_CREAT|O_WRONLY|O_TRUNC|UDM_BINARY,UDM_IWRITE))<0){
		fprintf(stderr,"Can't open '%s': %s\n",fname,strerror(errno));
		goto err1;
	}
	if((count*4)!=write(dat_fd,data,count*4)){
		fprintf(stderr,"Can't write '%s': %s\n",fname,strerror(errno));
		goto err1;
	}
	close(dat_fd);
	UDM_FREE(data);

	snprintf(fname,sizeof(fname),"%s%s%c%s.ind",Agent->Conf->vardir,TREEDIR,UDMSLASH,lim_name);
	if((ind_fd=open(fname,O_CREAT|O_WRONLY|O_TRUNC|UDM_BINARY,UDM_IWRITE))<0){
		fprintf(stderr,"Can't open '%s': %s\n",fname,strerror(errno));
		goto err1;
	}
	if((nind*sizeof(UDM_UINT4_POS_LEN))!=write(ind_fd,ind,nind*sizeof(UDM_UINT4_POS_LEN))){
		fprintf(stderr,"Can't write '%s': %s\n",fname,strerror(errno));
		goto err1;
	}
	close(ind_fd);
	UDM_FREE(ind);
	
	return(0);
	
err1:
	UDM_FREE(pairs);
	UDM_FREE(data);
	UDM_FREE(ind);
	if(dat_fd)close(dat_fd);
	if(ind_fd)close(ind_fd);
	return(1);
}

int main(int argc,char **argv){
	int ch,type=0;
	UDM_ENV *Env;
	UDM_AGENT *Agent;

	while ((ch = getopt(argc, argv, "cthmul?")) != -1){
		switch (ch) {
			case 'c': type=0; break;
			case 't': type=1; break;
			case 'h': type=2; break;
			case 'm': type=3; break;
			case 'u': type=4; break;
			case 'l': type=5; break;
		        case '?': usage(); return 0;
		}
	}
	argc -= optind;argv += optind;
	if(argc > 1 || (argc == 0 && optind == 1)){
		usage();
		return(1);
	}
	if(argc == 1) config_name = argv[0];
	
	
	Env=UdmEnvInit(NULL);
	if(UDM_OK!=UdmLoadConfig(Env,config_name,0,0)){
		fprintf(stderr,"%s\n",UdmEnvErrMsg(Env));
		UdmEnvFree(Env);
		return(1);
	}
	Agent=UdmAgentInit(NULL, Env, 0);

	switch (type) {
		case 0:	MakeNestedIndex(Agent,"category","lim_cat",0,Agent->Conf->db);
			break;	
		case 1:	MakeLinearIndex(Agent,"tag","lim_tag",3,Agent->Conf->db);
			break;
		case 2:	MakeLinearIndex(Agent,"last_mod_time","lim_hour",0,Agent->Conf->db);
			break;
		case 3:	MakeLinearIndex(Agent,"last_mod_time","lim_min",1,Agent->Conf->db);
			break;
		case 4:	MakeLinearIndex(Agent,"url","lim_host",2,Agent->Conf->db);
			break;
		case 5:	MakeLinearIndex(Agent, "lang", "lim_lang", 3,Agent->Conf->db);
			break;
	}
	
	UdmAgentFree(Agent);
	UdmEnvFree(Env);
	return(0);
}
